version 16.1
clear all
cd "MYPATH\derived\make_master_dataset"
adopath + ../../ado/

cap log close

log using "clean.log", replace

preliminaries
foreach PATH in RESULTS TEMP {
	cap mkdir "$TEMP\derived\make_master_dataset"
    if "`PATH'" != "TEMP" cap mkdir "${`PATH'}\derived\make_master_dataset\figures"
}

graph set window fontface default
graph set ps fontface default
graph set window fontfacemono "Consolas"
graph set ps fontfacemono "Consolas"


program main 
    clean_data
end


program clean_data
	use "$DATA\master_dataset", clear
	define_date_preg
	merge_admin
	
	di "Define pregnancy county"
	rename lan lan_tax_data
	qui gen lan = lan_kub
	qui gen lan_flag = 1 if !mi(lan)
	replace lan_flag = 2 if mi(lan) & !mi(lan_tax_data)
	label def lan_flag_lab 1 "KUB testing clinic" 2 "Tax data"
	label val lan_flag lan_flag_lab
	replace lan = lan_tax_data if mi(lan)
	codebook lan_flag
	label var lan "County (assigned)"
	
	di "Mother age" 
	define_mother_age
	codebook age_flag
	
	di  "Testing Vars" 
	qui {
		gen did_kub = 1 * !mi(test_id) 
		gen did_nipt = 1 * (nipt_utfort == 1)
		gen did_amnio = 1 * (did_amnio_kubdata == 1)
		gen did_cv = 1 * (did_cv_kubdata == 1)
		rename nipt_utfort did_nipt_kubdata
		gen did_invasive = 1 * (did_amnio == 1 | did_cv == 1)
		gen subt = 1 * (did_nipt == 1 | did_invasive == 1)
		
	}
	di "define pregnancy outcome"
	define_outcome
	
	di "diagnosed chrom ab"
	diagnosed_chrom_ab
	
	di "other fetal issues"
	other_fetal_issues
	
	di "kub risk var"
	kub_risk_var
	
	di "checks"
	checks 
	
	save $DATA\master_dataset_clean.dta, replace
end

program define_date_preg
	di "Define Pregnancy Date"
	qui {
		gen date_preg = .
		format date_preg %td
		label var date_preg "Pregnancy Date (assigned)"
		* ADD flag for which method we calculated the pregnancy date with
		gen date_preg_flag = . 
		label var date_preg_flag "Method for assigning pregnancy date"
		
		* (280 - Gestational age at KUB testing) + KUB testing date (from KUB) 
		replace date_preg = (280 - ga_vid_dagar) + undersokningsdatum
		
		* Date of last period + 280 (from KUB)
		replace date_preg = sm_datum + 280 if mi(date_preg) & !mi(sm_datum)
		replace date_preg = . if abs(sm_datum - undersokningsdatum) >= 365 & mi(date_preg)
		replace date_preg_flag = 1 if !mi(date_preg) 
		
		* birth date (from MFR)
		replace date_preg = birth_date if mi(date_preg)
		replace date_preg_flag = 2 if !mi(date_preg) & mi(date_preg_flag)
		
		* If still missing, assign based on KUB test date. Assume test occurs at end of 
		* 12th week. So birth month is 28 weeks (196 days) after test date.
		replace date_preg = undersokningsdatum + 196 if mi(date_preg)
		replace date_preg_flag = 3 if !mi(date_preg) & mi(date_preg_flag)
		
		label def date_preg_flag_lab 1 "Date of last period + 280 (from KUB)" 2 "birth date (from MFR)" 3 "NT test date + 196 days"
		label val date_preg_flag date_preg_flag_lab 
		
		* check to ensure that there are no pregnancies with multiple birth year/months
		bys pregnancy: egen sd_date_preg = sd(date_preg)
		assert !(sd_date_preg != 0 & !mi(sd_date_preg))
		qui drop sd_date_preg
	}
	qui gen month = month(date_preg)
	qui label var month "Pregnancy Month (assigned)"
	qui gen year = year(date_preg)
	qui label var month "Pregnancy Month (assigned)"
	qui gen day = day(date_preg)
	qui label var day "Pregnancy day (assigned)"

end

program merge_admin
    di "Merge admin"
	cap drop _merge
    qui {
	   rename lan lan_kub
	   rename year year_expdob
	   gen year = undersokningsdatum_yr
	   merge m:1 lopnr year using "MYPATH\MYPATH.dta", assert(1 2 3) keep(1 3) ///
		 generate(mother_in_tax_data) keepusing(age lan)
	   replace mother_in_tax_data = 1*(mother_in_tax_data == 3)
	   drop year
	   rename year_expdob year
	   sort lopnr, stable
	   merge m:1 lopnr using "MYPATH\MYPATH.dta",  ///
		 assert(1 2 3) keep(1 2 3) keepusing(fodelsear fodelsemanad)
	   destring fodelsear, replace
	   noi tab _merge 
	   drop if _merge == 2
	   drop _merge 
	   label var fodelsear "Mother's year of birth (Bakgrund)"
	   destring fodelsemanad, replace
	   label var fodelsemanad "Mother's month of birth (Bakgrund)"
   }
end

program define_mother_age
	qui rename age age_tax_data 
	qui label var age_tax_data "Age (Tax Data)"
	
	* Mother age at expected birth date from KUB 
	qui {
		bys pregnancy: egen age = max(patient_alder_vid_bpu)
		gen age_flag = 1 if !mi(age)
	}
	
	* Mother age from bakgrund
	qui {
		gen temp_mother_birth = mdy(fodelsemanad, 15, fodelsear)
		format temp_mother_birth %td
		personage temp_mother_birth date_preg, gen(age_temp)
		replace age_flag = 2 if mi(age) & !mi(age_temp)
		replace age = age_temp if mi(age)
	}
	
	* Mother's birth date
	qui {
	    gen mother_birth_date = date_preg - (age * 365.25)
		format mother_birth_date date_preg %td
		noi codebook mother_birth_date
		gen date_diff = (date_preg - mother_birth_date) / 365.25
		noi assert abs(date_diff - age) < 0.02 if !mi(age)
		drop date_diff
	}
	
	label def age_flag_lab 1 "KUB age" 2 "Age from bakgrund"
	label val age_flag age_flag_lab
	label var age "Age at BPU (assigned)"
	label var mother_birth_date "Mother's birth date"
	qui replace age = floor(age)
end

program define_outcome
	di "Use MFR outcome"
	qui gen live_birth = 1 * (birth_flag != 2) & !mi(birth_flag)
	qui gen stillbirth = 1 * (birth_flag == 2)
	assert in_MFR == 0 if mi(birth_flag) 
	qui gen termination_clean = 1 * mi(birth_flag)
end

program diagnosed_chrom_ab
	di "Assign diagnosed Chromosomal abnormality"
	* Any chrom ab
	qui gen chrom_ab = 1 * (strpos(baby_diagnosis_birth, "Q9") != 0)
	label var chrom_ab "Any diagnosed chrom ab"
	
	* Split by if detectable by NIPT
	qui gen chrom_ab_detect  = 1 * (strpos(baby_diagnosis_birth, "Q90") != 0 | ///
	  strpos(baby_diagnosis_birth, "Q91") != 0)
	label var chrom_ab_detect "Chrom ab detectable by NIPT"
	
	qui gen chrom_ab_other = 1 * (chrom_ab == 1 & chrom_ab_detect == 0)
	label var chrom_ab_other "Chrom ab not detectable by NIPT"
end

program other_fetal_issues
	qui gen pnatal_issue = 1 * (strpos(baby_diagnosis_birth, "P") != 0)
	label var pnatal_issue "Any P ICD code"
	qui gen pnatal_issue1 = 0 
	label var pnatal_issue1 "P00-P04"
	foreach i in "0" "1" "2" "3" "4" {
		replace pnatal_issue1 = 1 if (strpos(baby_diagnosis_birth, "P0`i'") != 0)
	}
	qui gen pnatal_issue2 = 0
	label var pnatal_issue2 "P05-P08"
	foreach i in "5" "6" "7" "8" {
		replace pnatal_issue2 = 1 if (strpos(baby_diagnosis_birth, "P0`i'") != 0)
	}
	qui gen pnatal_issue3 = 0
	label var pnatal_issue3 "P09"
	replace pnatal_issue3 = 1 if (strpos(baby_diagnosis_birth, "P09") != 0)
	qui gen pnatal_issue4 = 0 
	label var pnatal_issue4 "P10-P15"
	foreach i in "0" "1" "2" "3" "4" "5" {
		replace pnatal_issue4 = 1 if (strpos(baby_diagnosis_birth, "P1`i'") != 0)
	}
	qui gen pnatal_issue5 = 0 

	label var pnatal_issue5 "P19-P29"
	foreach i in "0" "1" "2" "3" "4" "5" "6" "7" "8" "9" {
		if "`i'" == "0" replace pnatal_issue5 = 1 if (strpos(baby_diagnosis_birth, "P19") != 0)
		replace pnatal_issue5 = 1 if (strpos(baby_diagnosis_birth, "P2`i'") != 0)
	}
	qui gen pnatal_issue6 = 0
	label var pnatal_issue6 "P35-P39"
	foreach i in "5" "6" "7" "8" "9" {
		replace pnatal_issue6 = 1 if (strpos(baby_diagnosis_birth, "P3`i'") != 0)
	}
	qui gen pnatal_issue7 = 0 
	label var pnatal_issue7 "P50-P61"
	foreach i in "0" "1" "2" "3" "4" "5" "6" "7" "8" "9" {
		if "`i'" == "0" replace pnatal_issue7 = 1 if (strpos(baby_diagnosis_birth, "P60") != 0)
		if "`i'" == "1" replace pnatal_issue7 = 1 if (strpos(baby_diagnosis_birth, "P61") != 0)
		replace pnatal_issue7 = 1 if (strpos(baby_diagnosis_birth, "P5`i'") != 0)
	}
	qui gen pnatal_issue8 = 0 
	label var pnatal_issue8 "P70-P74"
	foreach i in "0" "1" "2" "3" "4" {
		replace pnatal_issue8 = 1 if (strpos(baby_diagnosis_birth, "P7`i'") != 0)
	}
	qui gen pnatal_issue9 = 0 
	label var pnatal_issue9 "P76-P78"
	foreach i in "6" "7" "8" {
		replace pnatal_issue9 = 1 if (strpos(baby_diagnosis_birth, "P7`i'") != 0)
	}
	qui gen pnatal_issue10 = 0 
	label var pnatal_issue10 "P80-P83"
	foreach i in "0" "1" "2" "3" {
		replace pnatal_issue10 = 1 if (strpos(baby_diagnosis_birth, "P8`i'") != 0)
	}
	
	qui gen pnatal_issue11 = 0 
	label var pnatal_issue11 "P84"
	foreach i in "4" {
		replace pnatal_issue11 = 1 if (strpos(baby_diagnosis_birth, "P8`i'") != 0)
	}
	qui gen pnatal_issue12 = 0 
	label var pnatal_issue12 "P90-P96"
	foreach i in "0" "1" "2" "3" "4" "5" "6" {
		replace pnatal_issue12 = 1 if (strpos(baby_diagnosis_birth, "P9`i'") != 0)
	}

	qui gen congen_deform = 0 // non chrom_ab deformation
	foreach i in "0" "1" "2" "3" "4" "5" "6" "7" "8" {
		replace congen_deform = 1 if (strpos(baby_diagnosis_birth, "Q`i'") != 0)
	}
	
	gen m_year_bpsmdat =ym(bpsmdat_year, bpsmdat_month)
	gen dob = ym(floor(bfoddat/100), mod(bfoddat, 100))
	format m_year_bpsmdat dob %tm
	gen months_early = m_year_bpsmdat - dob  if birth_flag == 1 | birth_flag == 4
	gen preterm_live = 1 * (m_year_bpsmdat - dob > 1 & (birth_flag == 1 | birth_flag == 4) & !mi(m_year_bpsmdat))
	tab preterm_live 
	tab months_early if preterm_live == 1
	drop m_year_bpsmdat dob months_early
end

program kub_risk_var
	drop fetus_risk1 fetus_risk2
	format fetus_risk %7.0fc
	label var fetus_risk "1/KUB risk"

	gen kub_test_date = undersokningsdatum
	label var kub_test_date "KUB testing date"
end


program checks
	di "termination can't be 1 if live birth is 1"
	assert termination_clean != 1 if live_birth == 1
	di "stillbirth can't be 1 if live birth is 1"
	assert stillbirth != 1 if live_birth == 1
	di "termination can't be 1 if stillbirth is 1"	
	assert termination_clean != 1 if stillbirth == 1
	di "stillbirth and termination are 0 if live birth == 1"
	assert stillbirth == 0 & termination_clean == 0 if live_birth == 1
end

* Execute * 

main
